{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Time series"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "from __future__ import division\n",
    "from pandas import Series, DataFrame\n",
    "import pandas as pd\n",
    "from numpy.random import randn\n",
    "import numpy as np\n",
    "pd.options.display.max_rows = 12\n",
    "np.set_printoptions(precision=4, suppress=True)\n",
    "import matplotlib.pyplot as plt\n",
    "plt.rc('figure', figsize=(12, 4))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "%matplotlib inline"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Date and Time Data Types and Tools"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "from datetime import datetime\n",
    "now = datetime.now()\n",
    "now"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "now.year, now.month, now.day"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "delta = datetime(2011, 1, 7) - datetime(2008, 6, 24, 8, 15)\n",
    "delta"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "delta.days"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "delta.seconds"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "from datetime import timedelta\n",
    "start = datetime(2011, 1, 7)\n",
    "start + timedelta(12)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "start - 2 * timedelta(12)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Converting between string and datetime"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "stamp = datetime(2011, 1, 3)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "str(stamp)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "stamp.strftime('%Y-%m-%d')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "value = '2011-01-03'\n",
    "datetime.strptime(value, '%Y-%m-%d')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "datestrs = ['7/6/2011', '8/6/2011']\n",
    "[datetime.strptime(x, '%m/%d/%Y') for x in datestrs]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "from dateutil.parser import parse\n",
    "parse('2011-01-03')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "parse('Jan 31, 1997 10:45 PM')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "parse('6/12/2011', dayfirst=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "datestrs"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "pd.to_datetime(datestrs)\n",
    "# note: output changed (no '00:00:00' anymore)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "idx = pd.to_datetime(datestrs + [None])\n",
    "idx"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "idx[2]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "pd.isnull(idx)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Time Series Basics"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "from datetime import datetime\n",
    "dates = [datetime(2011, 1, 2), datetime(2011, 1, 5), datetime(2011, 1, 7),\n",
    "         datetime(2011, 1, 8), datetime(2011, 1, 10), datetime(2011, 1, 12)]\n",
    "ts = Series(np.random.randn(6), index=dates)\n",
    "ts"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "type(ts)\n",
    "# note: output changed to \"pandas.core.series.Series\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "ts.index"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "ts + ts[::2]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "ts.index.dtype\n",
    "# note: output changed from dtype('datetime64[ns]') to dtype('<M8[ns]')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "stamp = ts.index[0]\n",
    "stamp\n",
    "# note: output changed from <Timestamp: 2011-01-02 00:00:00> to Timestamp('2011-01-02 00:00:00')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Indexing, selection, subsetting"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "stamp = ts.index[2]\n",
    "ts[stamp]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "ts['1/10/2011']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "ts['20110110']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "longer_ts = Series(np.random.randn(1000),\n",
    "                   index=pd.date_range('1/1/2000', periods=1000))\n",
    "longer_ts"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "longer_ts['2001']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "longer_ts['2001-05']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "ts[datetime(2011, 1, 7):]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "ts"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "ts['1/6/2011':'1/11/2011']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "ts.truncate(after='1/9/2011')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "dates = pd.date_range('1/1/2000', periods=100, freq='W-WED')\n",
    "long_df = DataFrame(np.random.randn(100, 4),\n",
    "                    index=dates,\n",
    "                    columns=['Colorado', 'Texas', 'New York', 'Ohio'])\n",
    "long_df.ix['5-2001']"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Time series with duplicate indices"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "dates = pd.DatetimeIndex(['1/1/2000', '1/2/2000', '1/2/2000', '1/2/2000',\n",
    "                          '1/3/2000'])\n",
    "dup_ts = Series(np.arange(5), index=dates)\n",
    "dup_ts"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "dup_ts.index.is_unique"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "dup_ts['1/3/2000']  # not duplicated"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "dup_ts['1/2/2000']  # duplicated"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "grouped = dup_ts.groupby(level=0)\n",
    "grouped.mean()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "grouped.count()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Date ranges, Frequencies, and Shifting"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "ts"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "ts.resample('D')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Generating date ranges"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "index = pd.date_range('4/1/2012', '6/1/2012')\n",
    "index"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "pd.date_range(start='4/1/2012', periods=20)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "pd.date_range(end='6/1/2012', periods=20)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "pd.date_range('1/1/2000', '12/1/2000', freq='BM')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "pd.date_range('5/2/2012 12:56:31', periods=5)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "pd.date_range('5/2/2012 12:56:31', periods=5, normalize=True)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Frequencies and Date Offsets"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "from pandas.tseries.offsets import Hour, Minute\n",
    "hour = Hour()\n",
    "hour"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "four_hours = Hour(4)\n",
    "four_hours"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "pd.date_range('1/1/2000', '1/3/2000 23:59', freq='4h')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "Hour(2) + Minute(30)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "pd.date_range('1/1/2000', periods=10, freq='1h30min')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Week of month dates"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "rng = pd.date_range('1/1/2012', '9/1/2012', freq='WOM-3FRI')\n",
    "list(rng)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Shifting (leading and lagging) data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "ts = Series(np.random.randn(4),\n",
    "            index=pd.date_range('1/1/2000', periods=4, freq='M'))\n",
    "ts"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "ts.shift(2)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "ts.shift(-2)"
   ]
  },
  {
   "cell_type": "raw",
   "metadata": {},
   "source": [
    "ts / ts.shift(1) - 1"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "ts.shift(2, freq='M')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "ts.shift(3, freq='D')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "ts.shift(1, freq='3D')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "ts.shift(1, freq='90T')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Shifting dates with offsets"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "from pandas.tseries.offsets import Day, MonthEnd\n",
    "now = datetime(2011, 11, 17)\n",
    "now + 3 * Day()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "now + MonthEnd()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "now + MonthEnd(2)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "offset = MonthEnd()\n",
    "offset.rollforward(now)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "offset.rollback(now)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "ts = Series(np.random.randn(20),\n",
    "            index=pd.date_range('1/15/2000', periods=20, freq='4d'))\n",
    "ts.groupby(offset.rollforward).mean()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "ts.resample('M', how='mean')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Time Zone Handling"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "import pytz\n",
    "pytz.common_timezones[-5:]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "tz = pytz.timezone('US/Eastern')\n",
    "tz"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Localization and Conversion"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "rng = pd.date_range('3/9/2012 9:30', periods=6, freq='D')\n",
    "ts = Series(np.random.randn(len(rng)), index=rng)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "print(ts.index.tz)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "pd.date_range('3/9/2012 9:30', periods=10, freq='D', tz='UTC')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "ts_utc = ts.tz_localize('UTC')\n",
    "ts_utc"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "ts_utc.index"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "ts_utc.tz_convert('US/Eastern')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "ts_eastern = ts.tz_localize('US/Eastern')\n",
    "ts_eastern.tz_convert('UTC')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "ts_eastern.tz_convert('Europe/Berlin')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "ts.index.tz_localize('Asia/Shanghai')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Operations with time zone-aware Timestamp objects"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "stamp = pd.Timestamp('2011-03-12 04:00')\n",
    "stamp_utc = stamp.tz_localize('utc')\n",
    "stamp_utc.tz_convert('US/Eastern')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "stamp_moscow = pd.Timestamp('2011-03-12 04:00', tz='Europe/Moscow')\n",
    "stamp_moscow"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "stamp_utc.value"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "stamp_utc.tz_convert('US/Eastern').value"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "# 30 minutes before DST transition\n",
    "from pandas.tseries.offsets import Hour\n",
    "stamp = pd.Timestamp('2012-03-12 01:30', tz='US/Eastern')\n",
    "stamp"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "stamp + Hour()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "# 90 minutes before DST transition\n",
    "stamp = pd.Timestamp('2012-11-04 00:30', tz='US/Eastern')\n",
    "stamp"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "stamp + 2 * Hour()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Operations between different time zones"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "rng = pd.date_range('3/7/2012 9:30', periods=10, freq='B')\n",
    "ts = Series(np.random.randn(len(rng)), index=rng)\n",
    "ts"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "ts1 = ts[:7].tz_localize('Europe/London')\n",
    "ts2 = ts1[2:].tz_convert('Europe/Moscow')\n",
    "result = ts1 + ts2\n",
    "result.index"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Periods and Period Arithmetic"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "p = pd.Period(2007, freq='A-DEC')\n",
    "p"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "p + 5"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "p - 2"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "pd.Period('2014', freq='A-DEC') - p"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "rng = pd.period_range('1/1/2000', '6/30/2000', freq='M')\n",
    "rng"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "Series(np.random.randn(6), index=rng)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "values = ['2001Q3', '2002Q2', '2003Q1']\n",
    "index = pd.PeriodIndex(values, freq='Q-DEC')\n",
    "index"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Period Frequency Conversion"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "p = pd.Period('2007', freq='A-DEC')\n",
    "p.asfreq('M', how='start')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "p.asfreq('M', how='end')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "p = pd.Period('2007', freq='A-JUN')\n",
    "p.asfreq('M', 'start')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "p.asfreq('M', 'end')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "p = pd.Period('Aug-2007', 'M')\n",
    "p.asfreq('A-JUN')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "rng = pd.period_range('2006', '2009', freq='A-DEC')\n",
    "ts = Series(np.random.randn(len(rng)), index=rng)\n",
    "ts"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "ts.asfreq('M', how='start')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "ts.asfreq('B', how='end')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Quarterly period frequencies"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "p = pd.Period('2012Q4', freq='Q-JAN')\n",
    "p"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "p.asfreq('D', 'start')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "p.asfreq('D', 'end')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "p4pm = (p.asfreq('B', 'e') - 1).asfreq('T', 's') + 16 * 60\n",
    "p4pm"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "p4pm.to_timestamp()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "rng = pd.period_range('2011Q3', '2012Q4', freq='Q-JAN')\n",
    "ts = Series(np.arange(len(rng)), index=rng)\n",
    "ts"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "new_rng = (rng.asfreq('B', 'e') - 1).asfreq('T', 's') + 16 * 60\n",
    "ts.index = new_rng.to_timestamp()\n",
    "ts"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Converting Timestamps to Periods (and back)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "rng = pd.date_range('1/1/2000', periods=3, freq='M')\n",
    "ts = Series(randn(3), index=rng)\n",
    "pts = ts.to_period()\n",
    "ts"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "pts"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "rng = pd.date_range('1/29/2000', periods=6, freq='D')\n",
    "ts2 = Series(randn(6), index=rng)\n",
    "ts2.to_period('M')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "pts = ts.to_period()\n",
    "pts"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "pts.to_timestamp(how='end')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Creating a PeriodIndex from arrays"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "data = pd.read_csv('ch08/macrodata.csv')\n",
    "data.year"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "data.quarter"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "index = pd.PeriodIndex(year=data.year, quarter=data.quarter, freq='Q-DEC')\n",
    "index"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "data.index = index\n",
    "data.infl"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Resampling and Frequency Conversion"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "rng = pd.date_range('1/1/2000', periods=100, freq='D')\n",
    "ts = Series(randn(len(rng)), index=rng)\n",
    "ts.resample('M', how='mean')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "ts.resample('M', how='mean', kind='period')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Downsampling"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "rng = pd.date_range('1/1/2000', periods=12, freq='T')\n",
    "ts = Series(np.arange(12), index=rng)\n",
    "ts"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "ts.resample('5min', how='sum')\n",
    "# note: output changed (as the default changed from closed='right', label='right' to closed='left', label='left'"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "ts.resample('5min', how='sum', closed='left')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "ts.resample('5min', how='sum', closed='left', label='left')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "ts.resample('5min', how='sum', loffset='-1s')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Open-High-Low-Close (OHLC) resampling"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "ts.resample('5min', how='ohlc')\n",
    "# note: output changed because of changed defaults"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Resampling with GroupBy"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "rng = pd.date_range('1/1/2000', periods=100, freq='D')\n",
    "ts = Series(np.arange(100), index=rng)\n",
    "ts.groupby(lambda x: x.month).mean()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "ts.groupby(lambda x: x.weekday).mean()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Upsampling and interpolation"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "frame = DataFrame(np.random.randn(2, 4),\n",
    "                  index=pd.date_range('1/1/2000', periods=2, freq='W-WED'),\n",
    "                  columns=['Colorado', 'Texas', 'New York', 'Ohio'])\n",
    "frame"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "df_daily = frame.resample('D')\n",
    "df_daily"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "frame.resample('D', fill_method='ffill')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "frame.resample('D', fill_method='ffill', limit=2)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "frame.resample('W-THU', fill_method='ffill')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Resampling with periods"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "frame = DataFrame(np.random.randn(24, 4),\n",
    "                  index=pd.period_range('1-2000', '12-2001', freq='M'),\n",
    "                  columns=['Colorado', 'Texas', 'New York', 'Ohio'])\n",
    "frame[:5]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "annual_frame = frame.resample('A-DEC', how='mean')\n",
    "annual_frame"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "# Q-DEC: Quarterly, year ending in December\n",
    "annual_frame.resample('Q-DEC', fill_method='ffill')\n",
    "# note: output changed, default value changed from convention='end' to convention='start' + 'start' changed to span-like\n",
    "# also the following cells"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "annual_frame.resample('Q-DEC', fill_method='ffill', convention='start')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "annual_frame.resample('Q-MAR', fill_method='ffill')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Time series plotting"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "close_px_all = pd.read_csv('ch09/stock_px.csv', parse_dates=True, index_col=0)\n",
    "close_px = close_px_all[['AAPL', 'MSFT', 'XOM']]\n",
    "close_px = close_px.resample('B', fill_method='ffill')\n",
    "close_px.info()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "close_px['AAPL'].plot()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "close_px.ix['2009'].plot()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "close_px['AAPL'].ix['01-2011':'03-2011'].plot()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "appl_q = close_px['AAPL'].resample('Q-DEC', fill_method='ffill')\n",
    "appl_q.ix['2009':].plot()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Moving window functions"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "close_px = close_px.asfreq('B').fillna(method='ffill')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "close_px.AAPL.plot()\n",
    "pd.rolling_mean(close_px.AAPL, 250).plot()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "plt.figure()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "appl_std250 = pd.rolling_std(close_px.AAPL, 250, min_periods=10)\n",
    "appl_std250[5:12]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "appl_std250.plot()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "# Define expanding mean in terms of rolling_mean\n",
    "expanding_mean = lambda x: rolling_mean(x, len(x), min_periods=1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "pd.rolling_mean(close_px, 60).plot(logy=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "plt.close('all')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Exponentially-weighted functions"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "fig, axes = plt.subplots(nrows=2, ncols=1, sharex=True, sharey=True,\n",
    "                         figsize=(12, 7))\n",
    "\n",
    "aapl_px = close_px.AAPL['2005':'2009']\n",
    "\n",
    "ma60 = pd.rolling_mean(aapl_px, 60, min_periods=50)\n",
    "ewma60 = pd.ewma(aapl_px, span=60)\n",
    "\n",
    "aapl_px.plot(style='k-', ax=axes[0])\n",
    "ma60.plot(style='k--', ax=axes[0])\n",
    "aapl_px.plot(style='k-', ax=axes[1])\n",
    "ewma60.plot(style='k--', ax=axes[1])\n",
    "axes[0].set_title('Simple MA')\n",
    "axes[1].set_title('Exponentially-weighted MA')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Binary moving window functions"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "close_px\n",
    "spx_px = close_px_all['SPX']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "spx_rets = spx_px / spx_px.shift(1) - 1\n",
    "returns = close_px.pct_change()\n",
    "corr = pd.rolling_corr(returns.AAPL, spx_rets, 125, min_periods=100)\n",
    "corr.plot()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "corr = pd.rolling_corr(returns, spx_rets, 125, min_periods=100)\n",
    "corr.plot()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### User-defined moving window functions"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "from scipy.stats import percentileofscore\n",
    "score_at_2percent = lambda x: percentileofscore(x, 0.02)\n",
    "result = pd.rolling_apply(returns.AAPL, 250, score_at_2percent)\n",
    "result.plot()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Performance and Memory Usage Notes"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "rng = pd.date_range('1/1/2000', periods=10000000, freq='10ms')\n",
    "ts = Series(np.random.randn(len(rng)), index=rng)\n",
    "ts"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "ts.resample('15min', how='ohlc').info()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "%timeit ts.resample('15min', how='ohlc')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "rng = pd.date_range('1/1/2000', periods=10000000, freq='1s')\n",
    "ts = Series(np.random.randn(len(rng)), index=rng)\n",
    "%timeit ts.resample('15s', how='ohlc')"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 2",
   "language": "python",
   "name": "python2"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 2
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython2",
   "version": "2.7.10"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 0
}
